{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RfgPhypo6Y75"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "import tensorflow_hub as hub\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4d11ifqws7I8"
   },
   "source": [
    "# DATA 준비"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TXd3kK0YLns4"
   },
   "source": [
    "DATA Load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hido3susLSe8"
   },
   "outputs": [],
   "source": [
    "images = np.load('/content/drive/MyDrive/pharyngitis/data/images_to_numpy.npy') #npy파일 불러오기\n",
    "labels = labels = [1 if i <= 147 else 0 for i in range(1, 363)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7LPspOeHh_Jx"
   },
   "outputs": [],
   "source": [
    "#class imbalace 해결하기 위한 class weight 계산\n",
    "num_0 = 215 #no_pharyngitis, label=1\n",
    "num_1 = 147 #pharyngitis, label=0\n",
    "num_tot=num_0+num_1\n",
    "\n",
    "k = num_tot**2 / (2*num_0*num_1)\n",
    "weight_for_0 = k * (num_1 / num_tot)\n",
    "weight_for_1 = k * (num_0 / num_tot)\n",
    "\n",
    "class_weight = {0: weight_for_0, 1: weight_for_1}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "H7hrJiDtWgjw",
    "outputId": "aac8746d-0295-4da2-a82e-5d34a49252bc"
   },
   "outputs": [],
   "source": [
    "class_weight"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "RmEf1k0-MG8Z"
   },
   "source": [
    "TF Dataset 으로 만들기"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pgk92uMTPro8"
   },
   "source": [
    "train, val split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eYp43n9OMGoX"
   },
   "outputs": [],
   "source": [
    "dataset1 = tf.data.Dataset.from_tensor_slices((images, labels))\n",
    "shuffled_dataset = dataset1.cache().shuffle(10000)\n",
    "# 데이터셋 크기\n",
    "dataset_size = len(shuffled_dataset)\n",
    "\n",
    "# 분할 비율 설정\n",
    "train_ratio = 0.8\n",
    "val_ratio = 0.2\n",
    "\n",
    "# 분할 크기 계산\n",
    "train_size = int(dataset_size * train_ratio)\n",
    "val_size = int(dataset_size * val_ratio)\n",
    "\n",
    "# 데이터셋 분할\n",
    "train_dataset = shuffled_dataset.take(train_size)\n",
    "val_dataset = shuffled_dataset.skip(train_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "useocwRHNouU",
    "outputId": "23092f74-89d7-4737-ad9e-c9e4261b6a24"
   },
   "outputs": [],
   "source": [
    "print('train_size:',train_size)\n",
    "print('val_size:', dataset_size - train_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xbKJVwISPEaP"
   },
   "outputs": [],
   "source": [
    "#augmentation layer\n",
    "data_augmentation = tf.keras.models.Sequential(\n",
    "  [\n",
    "    tf.keras.layers.RandomContrast(factor=(0.1, 0.1)),\n",
    "    tf.keras.layers.RandomRotation(0.05),\n",
    "    tf.keras.layers.RandomZoom(0.05),\n",
    "  ]\n",
    ")\n",
    "\n",
    "def augment_images(image, label):\n",
    "  image = data_augmentation(image)\n",
    "  return image, label\n",
    "\n",
    "aug_train_dataset = train_dataset.map(augment_images)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YOVExqioMUXW"
   },
   "outputs": [],
   "source": [
    "AUTOTUNE = tf.data.AUTOTUNE\n",
    "train_dataset = train_dataset.cache().shuffle(10000).batch(16).prefetch(buffer_size=AUTOTUNE)\n",
    "aug_train_dataset = aug_train_dataset.cache().shuffle(10000).batch(16).prefetch(buffer_size=AUTOTUNE)\n",
    "val_dataset = val_dataset.cache().batch(16).prefetch(buffer_size=AUTOTUNE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "sapR8WuWdWYg",
    "outputId": "5da6618b-22fd-47bb-d0b6-94f203329090"
   },
   "outputs": [],
   "source": [
    "#val dataset의 구성 확인(혹시 모두 0은 아닌지 확인)\n",
    "val_labels = []\n",
    "for _, label in val_dataset:\n",
    "    val_labels.extend(np.array(label))\n",
    "\n",
    "\n",
    "num_zeros = val_labels.count(0)\n",
    "num_ones = val_labels.count(1)\n",
    "\n",
    "print(\"Number of zeros:\", num_zeros)\n",
    "print(\"Number of ones:\", num_ones)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_Hx0ggnntLat"
   },
   "source": [
    "# Model 만들기"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TcV2Lmf7UX6D"
   },
   "source": [
    "Model 2 만들기"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "znwGwa9MkgP-"
   },
   "outputs": [],
   "source": [
    "def get_model_2_p2(input_shape=(224, 224, 3)):\n",
    "    resnet50_without_top_layer = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))\n",
    "\n",
    "    for layer in resnet50_without_top_layer.layers[:160]:\n",
    "        layer.trainable = False\n",
    "\n",
    "    x = tf.keras.layers.GlobalAveragePooling2D()(resnet50_without_top_layer.output)\n",
    "    x = tf.keras.layers.Dropout(0.2)(x)\n",
    "    x =  tf.keras.layers.Dense(16, activation='sigmoid')(x)\n",
    "    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)\n",
    "\n",
    "    model = tf.keras.Model(inputs=resnet50_without_top_layer.input, outputs=output)\n",
    "    return model\n",
    "\n",
    "def get_model_2(input_shape=(224, 224, 3)):\n",
    "    model_2_p2 = get_model_2_p2(input_shape=(224, 224, 3))\n",
    "\n",
    "    inputs = tf.keras.Input(shape=input_shape)\n",
    "    h = tf.keras.applications.resnet.preprocess_input(inputs) #resnet50이 요구하는 형태로 image 변형\n",
    "    outputs = model_2_p2(h)\n",
    "\n",
    "    model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
    "\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "ul9VqjLiUvt2",
    "outputId": "521ff058-f853-4911-b4d6-460b7c9acf60"
   },
   "outputs": [],
   "source": [
    "get_model_2().summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "2UE9PW77t0bw"
   },
   "source": [
    "# Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Wy2r-RE4VZHv",
    "outputId": "02e824bd-2339-4b25-b142-aa5af3491d8c"
   },
   "outputs": [],
   "source": [
    "model = get_model_2()\n",
    "model.compile(\n",
    "    optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, decay=1e-6, momentum= 0.9, nesterov = True)\n",
    "    ,loss='binary_crossentropy',\n",
    "    metrics=['accuracy'])\n",
    "\n",
    "callback = tf.keras.callbacks.EarlyStopping(\n",
    "    monitor='loss',\n",
    "    patience=10,\n",
    "    restore_best_weights=True\n",
    ")\n",
    "\n",
    "history = model.fit(train_dataset, epochs=60, callbacks = callback, validation_data=val_dataset, class_weight = class_weight) #class_weight = class_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 564
    },
    "id": "QQz1lclPWEXy",
    "outputId": "99cf006e-1f4e-4cf2-cdb4-f8852366f4a3"
   },
   "outputs": [],
   "source": [
    "#loss and accuracy graph\n",
    "fig, axs = plt.subplots(1, 2 ,figsize=(12, 6))\n",
    "\n",
    "# loss 그래프\n",
    "axs[0].plot(history.history['loss'], label='Train Loss')\n",
    "axs[0].plot(history.history['val_loss'], label='Validation Loss')\n",
    "axs[0].set_title('Training and Validation Loss')\n",
    "axs[0].set_xlabel('Epoch')\n",
    "axs[0].set_ylabel('Loss')\n",
    "axs[0].legend()\n",
    "\n",
    "# accuracy 그래프\n",
    "axs[1].plot(history.history['accuracy'], label='Train Accuracy')\n",
    "axs[1].plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
    "axs[1].set_title('Training and Validation Accuracy')\n",
    "axs[1].set_xlabel('Epoch')\n",
    "axs[1].set_ylabel('Accuracy')\n",
    "axs[1].legend()\n",
    "\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "y3i1BKGKW7u2"
   },
   "source": [
    "모델 prediction 확인"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 589
    },
    "id": "7orWZP9VXjad",
    "outputId": "28847c47-dbf3-484f-8464-370581576518"
   },
   "outputs": [],
   "source": [
    "random_indices = np.random.choice(len(images), size=10, replace=False)\n",
    "fig, axes = plt.subplots(2, 5, figsize=(12, 6))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for i, idx in enumerate(random_indices):\n",
    "    image = images[idx]\n",
    "    prediction = float(model(tf.expand_dims(image, axis=0)))\n",
    "    axes[i].imshow(image)\n",
    "    axes[i].axis('off')\n",
    "    img_title = 'Image #{}\\nPharyngitis Prediction: {:.4f}\\nAnswer: {}'.format(idx+1, prediction, labels[idx])\n",
    "    axes[i].set_title(img_title, fontsize=10, pad=5)  # Set the title with the image index\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "j1Bri3S3RLj6"
   },
   "source": [
    "모델이 틀리게 예측한 이미지들 확인"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "oYKBptJXRKYe",
    "outputId": "36715236-e57e-4286-d9a7-962b4b72bacf"
   },
   "outputs": [],
   "source": [
    "# 잘못 예측한 이미지 확인\n",
    "misclassified_images = []\n",
    "for batch_images, batch_labels in val_dataset:\n",
    "    batch_size = batch_images.shape[0]\n",
    "\n",
    "    for i in range(batch_size):\n",
    "        image = tf.expand_dims(batch_images[i], axis=0)\n",
    "        label = batch_labels[i]\n",
    "\n",
    "        predicted_probabilities = float(model.predict(image, verbose=0))\n",
    "        if predicted_probabilities>=0.5:\n",
    "            predicted_label = 1\n",
    "        else:\n",
    "            predicted_label = 0\n",
    "\n",
    "        if predicted_label != label.numpy():\n",
    "            misclassified_images.append((image[0], label.numpy(), predicted_probabilities))\n",
    "#틀린 개수\n",
    "print('# wrong:', len(misclassified_images))\n",
    "\n",
    "# 잘못 예측한 이미지 시각화\n",
    "for i in range(len(misclassified_images)):\n",
    "    image, true_label, predicted_label = misclassified_images[i]\n",
    "    plt.imshow(image, cmap='gray')  # 이미지는 흑백으로 가정\n",
    "    plt.title(f'True Label: {true_label}, Prediction Value: {predicted_label}')\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OJvrmXxBvKs3"
   },
   "source": [
    "# Save model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "oP8UQ6TuWahB"
   },
   "source": [
    "모델 .h5으로 저장"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "OZa2FFWJWcYW"
   },
   "outputs": [],
   "source": [
    "# 모델 저장\n",
    "model_dir =  '/content/drive/MyDrive/pharyngitis/models'\n",
    "model_name = 'some_resnet50_included_onlylastlayer_WithAugmentation_0723_1453.h5'\n",
    "model_path = os.path.join(model_dir, model_name)\n",
    "\n",
    "model.save(model_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rHqkAdexvPZ6"
   },
   "source": [
    "# Load model and test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 364
    },
    "id": "NCcm31AdvXJs",
    "outputId": "abb6516f-0825-4d54-d706-5db81063b785"
   },
   "outputs": [],
   "source": [
    "model_name = 'some_resnet50_included_onlylastlayer_noAugmentation_0723_1445.h5'\n",
    "model_load_path = os.path.join(model_dir, model_name)\n",
    "\n",
    "loaded_model = tf.keras.models.load_model(model_load_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uM6q4Y9YSHm3"
   },
   "outputs": [],
   "source": [
    "loaded_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "cTUagCRKPf6_"
   },
   "outputs": [],
   "source": [
    "results = loaded_model.evaluate(val_dataset)\n",
    "\n",
    "print(\"Val Loss:\", results[0])\n",
    "print(\"Val Accuracy:\", results[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "On5KcpScQ7RY"
   },
   "source": [
    "loaded_model 결과 확인"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jxmQw1_DQvr0"
   },
   "outputs": [],
   "source": [
    "random_indices = np.random.choice(len(images), size=10, replace=False)\n",
    "fig, axes = plt.subplots(2, 5, figsize=(12, 6))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for i, idx in enumerate(random_indices):\n",
    "    image = images[idx]\n",
    "    prediction = float(model(tf.expand_dims(image, axis=0)))\n",
    "    axes[i].imshow(image)\n",
    "    axes[i].axis('off')\n",
    "    img_title = 'Image #{}\\nPharyngitis Prediction: {:.4f}\\nAnswer: {}'.format(idx+1, prediction, labels[idx])\n",
    "    axes[i].set_title(img_title, fontsize=10, pad=5)  # Set the title with the image index\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "JdXY-A2KQoXj"
   },
   "outputs": [],
   "source": [
    "# 잘못 예측한 이미지 확인\n",
    "misclassified_images = []\n",
    "for batch_images, batch_labels in val_dataset:\n",
    "    batch_size = batch_images.shape[0]\n",
    "\n",
    "    for i in range(batch_size):\n",
    "        image = tf.expand_dims(batch_images[i], axis=0)\n",
    "        label = batch_labels[i]\n",
    "\n",
    "        predicted_probabilities = float(loaded_model.predict(image, verbose=0))\n",
    "        if predicted_probabilities>=0.5:\n",
    "            predicted_label = 1\n",
    "        else:\n",
    "            predicted_label = 0\n",
    "\n",
    "        if predicted_label != label.numpy():\n",
    "            misclassified_images.append((image[0], label.numpy(), predicted_probabilities))\n",
    "#틀린 개수\n",
    "print('# wrong:', len(misclassified_images))\n",
    "\n",
    "# 잘못 예측한 이미지 시각화\n",
    "for i in range(len(misclassified_images)):\n",
    "    image, true_label, predicted_label = misclassified_images[i]\n",
    "    plt.imshow(image, cmap='gray')  # 이미지는 흑백으로 가정\n",
    "    plt.title(f'True Label: {true_label}, Prediction Value: {predicted_label}')\n",
    "    plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
